##Setup and Import Data

#Std Libraries
library(openair); #vis pkg
library(tidyverse); #helps vis pkg
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(worldmet); #met pkg

####Get Germany AQ Data
library(dplyr); #data manip fxns
library(saqgetr); #euro pkg

#view all sites and sort down to desired location
sites_dat <- get_saq_sites();
de_sites <- filter(sites_dat, country == 'germany');
berlin_sites <- filter(de_sites, latitude > 52.3 & latitude < 52.5 & longitude > 13.4 & longitude < 13.6)

#see this
head(de_sites)
## # A tibble: 6 x 16
##   site    site_name        latitude longitude elevation country country_iso_code
##   <chr>   <chr>               <dbl>     <dbl>     <dbl> <chr>   <chr>           
## 1 debb001 Burg (Spreewald)     51.8      14.1        54 germany DE              
## 2 debb003 Brandenburg a.d…     52.4      12.5        33 germany DE              
## 3 debb006 Cottbus-Süd          51.7      14.3        76 germany DE              
## 4 debb007 Elsterwerda          51.5      13.5        89 germany DE              
## 5 debb008 Finsterwalde         51.6      13.7       110 germany DE              
## 6 debb009 Forst                51.7      14.6        75 germany DE              
## # … with 9 more variables: site_type <chr>, site_area <chr>, date_start <dttm>,
## #   date_end <dttm>, network <chr>, eu_code <chr>, eoi_code <chr>,
## #   observation_count <dbl>, data_source <chr>
#let's test with Blankenfelde-Mahlow
prac_dat <- get_saq_observations(
    site = "debb086",
    start = 2011,
    end = 2015,
    verbose = TRUE
)
## 2022-01-26 18:26:14.818 EST: Loading `air_quality_data_site_debb086_2011.csv.gz`...
## 2022-01-26 18:26:15.289 EST: Loading `air_quality_data_site_debb086_2012.csv.gz`...
## 2022-01-26 18:26:15.441 EST: Loading `air_quality_data_site_debb086_2013.csv.gz`...
## 2022-01-26 18:26:15.880 EST: Loading `air_quality_data_site_debb086_2014.csv.gz`...
## 2022-01-26 18:26:16.757 EST: Loading `air_quality_data_site_debb086_2015.csv.gz`...
####Get Germany MET Data
#first find a station near the lat/long of the aq site
#the next fxn produces a map, don't run if not needed

#getMeta(lat = 52.54, lon = 13.34, returnMap = TRUE)

#there is only one site for Berlin, so we'll take that 

de_met <- importNOAA(code = "103850-99999", year = 2011:2015);

#you can then merge the data together by date as noted in documentation

#OPTION 1: subset and pivot
#cut irrelevant rows
new_prac_dat <- subset(prac_dat, select = -c(date_end, site, validity, summary, unit));

#first mutate the extracted pollutant data
prac_dat2 <- pivot_wider(
  new_prac_dat,
  id_cols = c(date, process),
  names_from = variable,
  values_from = value,
  values_fill = NA
)
#not quite perfect but very close. What are the different processes and can they be averaged?

#OPTION 2: saq_clean

prac_dat3 <- prac_dat %>% 
  saq_clean_observations(summary = "hour", valid_only = TRUE, spread = TRUE);

#that works well but the processes disappeared

####Append and Confirm
#now append the data
aq_berlin1 <- left_join(
    de_met,
    prac_dat3,
    by = "date"
)

#compare to basic data from text example as this has the ideal formatting
base_dat <- importAURN(site = "my1", year = 2000:2005);

head(base_dat)
## # A tibble: 6 x 13
##   site       code  date                   co   nox   no2    no    o3   so2  pm10
##   <chr>      <fct> <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 London Ma… MY1   2000-01-01 00:00:00   2.6   388    78   203     2    16    55
## 2 London Ma… MY1   2000-01-01 01:00:00   8.4   886    84   525     4    35    96
## 3 London Ma… MY1   2000-01-01 02:00:00   7.4   816   117   458     4    32    91
## 4 London Ma… MY1   2000-01-01 03:00:00   4.5   636    97   353     0    24    73
## 5 London Ma… MY1   2000-01-01 04:00:00   3.4   483    74   268     0    19    55
## 6 London Ma… MY1   2000-01-01 05:00:00   1.9   231    65   109     0    11    31
## # … with 3 more variables: pm2.5 <dbl>, ws <dbl>, wd <dbl>
head(aq_berlin1)
## # A tibble: 6 x 31
##   code         station  date                latitude longitude  elev    ws    wd
##   <fct>        <fct>    <dttm>                 <dbl>     <dbl> <dbl> <dbl> <dbl>
## 1 103850-99999 SCHONEF… 2011-01-01 00:00:00     52.4      13.5  47.8  7.7   255.
## 2 103850-99999 SCHONEF… 2011-01-01 01:00:00     52.4      13.5  47.8  9.55  260 
## 3 103850-99999 SCHONEF… 2011-01-01 02:00:00     52.4      13.5  47.8  9     260 
## 4 103850-99999 SCHONEF… 2011-01-01 03:00:00     52.4      13.5  47.8  9.25  254.
## 5 103850-99999 SCHONEF… 2011-01-01 04:00:00     52.4      13.5  47.8 10.3   250 
## 6 103850-99999 SCHONEF… 2011-01-01 05:00:00     52.4      13.5  47.8 10.3   240 
## # … with 23 more variables: air_temp <dbl>, atmos_pres <dbl>, visibility <dbl>,
## #   dew_point <dbl>, RH <dbl>, ceil_hgt <dbl>, cl_1 <dbl>, cl_2 <dbl>,
## #   cl_3 <dbl>, cl <dbl>, cl_1_height <dbl>, cl_2_height <dbl>,
## #   cl_3_height <dbl>, pwc <chr>, date_end <dttm>, site <chr>, co <dbl>,
## #   no <dbl>, no2 <dbl>, nox <dbl>, o3 <dbl>, pm10 <dbl>, pm2.5 <dbl>

Practicing Plots with my1

###Wind Rose

#plot a windrose for everything
windRose(aq_berlin1)

#plot by year
windRose(aq_berlin1, type = "year", layout = c(3, 2))

#plot by pm10 (the type function is useful!)
windRose(aq_berlin1, type = "pm10", layout = c(3, 2))
## Warning: removing 17903 missing rows due to pm10

###Pollution Rose

#basic example for nox
pollutionRose(aq_berlin1, pollutant = "nox")

#link with s02 using type
pollutionRose(aq_berlin1, pollutant = "nox", type = "no2", layout = c(4, 1))
## Warning: removing 17587 missing rows due to no2

#segment and normalize
pollutionRose(aq_berlin1, pollutant = "nox", seg = 1, normalise = TRUE)

    #seg = 1 removes the spaces between the bars
    #normalize makes the length of all bars the same to see pollutant proportions more clearly

###Polar Frequencies

#Basic example, a more detailed wind description
polarFreq(aq_berlin1)

#bin by year
polarFreq(aq_berlin1, type = "year")

#add pollutant
polarFreq(aq_berlin1, type = "year", pollutant = "pm10", statistic = "mean", min.bin = 2)

# we can see in detail how high concentrations came from SE in 2000, but moved north and decreased in frequency starting in 2003

###Percentile Rose

#basic example, the concentrations of the pollutant are marked on the rings and the percentiles are shaded
percentileRose(aq_berlin1, pollutant = "pm10")

#can also customize the percentiles and smooth the plot
percentileRose(aq_berlin1, pollutant = "pm10", percentile = c(25, 50, 75, 90, 95, 99), col = "jet", smooth = TRUE)

#we see the pm10 is pretty even from all directions

###Polar Plot

#basic polar plot
polarPlot(aq_berlin1, pollutant = "pm10")

# so we see that there is only a high conc from the sw when the wind speed is higher than usual

###Polar Annulus

#make polar annuli for all data, by season, by weekday, and by hour in a day

#polarAnnulus(aq_berlin1, poll = "pm10", period = "trend", main = "Trend")
polarAnnulus(aq_berlin1, poll = "pm10", period = "season", main = "Season")

polarAnnulus(aq_berlin1, poll = "pm10", period = "weekday", main = "Weekday")

polarAnnulus(aq_berlin1, poll = "pm10", period = "hour", main = "Hour")

# shows direction of pollution over time from inner most to outermost ring

###Time Series Plots

#unfinished

###Temporal Variations

#unfinished